library(ggplot2)
library(RRreg)
library(ggpubr)
library(boot)
library(estimatr)
library(gridExtra)
library(stargazer)
library(ordinal)

rm(list = ls())

#####################################################################################################################
# Replication code for "Messaging About Corruption: The Power of Social Norms", Governance
# Author: Mattias Agerberg, University of Gothenburg, mattias.agerberg@gu.se
# MAIN MANUSCRIPT + APPENDIX

#####################################################################################################################
# STUDY 2

#####################################################################################################################
# Data, study 2
data2 <- read.csv("C:/Users/maage/Dropbox/2021/Latin America Project/Submissions/GOV/RnR/Final/Replication/study2-data.csv") 

#####################################################################################################################
# WD
setwd("") 

#####################################################################################################################
# FIGURE 2 + analysis: beliefs about others, study 2
#####################################################################################################################
# Prepost histogram
pre <- ggplot(data2[!is.na(data2$bribeothers.post) & !is.na(data2$bribeothers),], aes(x=bribeothers)) +
  geom_histogram(aes(y = (..count..)/sum(..count..)), colour="black", fill="grey20", alpha = 0.75, 
                 breaks = c(0,10,20,30,40,50,60,70,80,90,100)) + 
  labs(x="", y="", title = "Pre treatment") +
  ylim(0,0.44) +
  theme_minimal(base_size = 7)

post <- ggplot(data2[!is.na(data2$bribeothers.post) & !is.na(data2$bribeothers),], aes(x=bribeothers.post)) +
  geom_histogram(aes(y = (..count..)/sum(..count..)), colour="black", fill="grey20", alpha = 0.75, 
                 breaks = c(0,10,20,30,40,50,60,70,80,90,100)) + 
  labs(x="", y="", title = "Post treatment") +
  ylim(0,0.44) +
  theme_minimal(base_size = 7)

# Combine graphs
list.combined <- ggarrange(pre, post,
                           ncol = 2, nrow = 1, common.legend = TRUE, legend = "bottom")
list.combined <- annotate_figure(list.combined, 
                                 left = text_grob("Density",size = 8,rot = 90,hjust = 0.05,vjust = 1),
     bottom = text_grob("Believed share of other people saying that accepting a bribe is never justifiable",
                        size = 8))
ggsave("figure2.tiff",width=14,height=8, units = "cm", dpi = 800, device = "tiff")

# Observations
length(data2$bribeothers.post[!is.na(data2$bribeothers.post)])
length(data2$bribeothers.post[data2$bribeothers.post==94&!is.na(data2$bribeothers.post)])/
  length(data2$bribeothers.post[!is.na(data2$bribeothers.post)])

# Majority/minority
length(data2$bribeothers[data2$bribeothers<=50&!is.na(data2$bribeothers.post)])/
  length(data2$bribeothers.post[!is.na(data2$bribeothers.post)])

length(data2$bribeothers.post[data2$bribeothers.post<=50&!is.na(data2$bribeothers.post)])/
  length(data2$bribeothers.post[!is.na(data2$bribeothers.post)])

# T-test
t.test(data2$bribeothers.post[!is.na(data2$bribeothers.post)], data2$bribeothers[!is.na(data2$bribeothers.post)], 
       paired = TRUE, alternative = "two.sided")

# Mean values
mean(data2$bribeothers, na.rm = T)

# Certain, treatment
data2$certain <- factor(data2$certain,levels = c("Very uncertain", "Quite uncertain", "Quite certain", "Very certain"))
data2$certain.post <- factor(data2$certain.post,levels = c("Very uncertain", "Quite uncertain", "Quite certain", "Very certain"))
wilcox.test(as.numeric(data2$certain[!is.na(data2$certain.post)]), as.numeric(data2$certain.post[!is.na(data2$certain.post)]),
            paired = TRUE, alternative = "two.sided")

mean(as.numeric(data2$certain[!is.na(data2$certain.post)]),na.rm = T)
mean(as.numeric(data2$certain.post[!is.na(data2$certain.post)]),na.rm = T)

#####################################################################################################################
# Treatment effects
#####################################################################################################################
# FIGURE 3, main manuscript

# Estimates
bribe.reg <- lm(bribeothers.combined~treatment, data = data2)
trust.reg <- glm(trust~treatment, data = data2, family = "binomial")
culture.reg <- glm(not.corrculture~treatment, data = data2, family = "binomial")
willing.reg <- RRlog(crosswise.bribe~treatment, data = data2, model = "Crosswise", p=0.25)

# Predictions
data.control <- data.treat <- data2[1,]
data.treat[,"treatment"] <- 1
data.control[,"treatment"] <- 0

trust.pred.treat <- predict.glm(trust.reg, type = "response", newdata = data.treat, se.fit = T)
trust.pred.control <- predict.glm(trust.reg, type = "response", newdata = data.control, se.fit = T)
culture.pred.treat <- predict.glm(culture.reg, type = "response", newdata = data.treat, se.fit = T)
culture.pred.control <- predict.glm(culture.reg, type = "response", newdata = data.control, se.fit = T)
willing.pred.treat <- predict(willing.reg, type = "attribute", newdata = data.treat, se.fit = T)
willing.pred.control <- predict(willing.reg, type = "attribute", newdata = data.control, se.fit = T)

# Trust graph
pred.mf <- matrix(nrow = 2,ncol = 3)
pred.mf[1,1] <- trust.pred.treat$fit[[1]]
pred.mf[1,2] <- trust.pred.treat$fit[[1]] - trust.pred.treat$se.fit[[1]]*1.96
pred.mf[1,3] <- trust.pred.treat$fit[[1]] + trust.pred.treat$se.fit[[1]]*1.96
pred.mf[2,1] <- trust.pred.control$fit[[1]]
pred.mf[2,2] <- trust.pred.control$fit[[1]] - trust.pred.control$se.fit[[1]]*1.96
pred.mf[2,3] <- trust.pred.control$fit[[1]] + trust.pred.control$se.fit[[1]]*1.96
pred.mf <- data.frame(pred.mf)
pred.mf$est <- c("Treatment", "Control")
pred.mf$est <- factor(pred.mf$est)
pred.mf$est <-  factor(pred.mf$est,levels(pred.mf$est)[c(2,1)])

trust.gr <- ggplot(pred.mf,aes(x=est,y=X1,colour=est)) +
  geom_errorbar(aes(ymin=X2, ymax=X3), width=.1,position = position_dodge(width = 0.2), size = 0.4) +
  geom_point(position = position_dodge(width = 0.2),size=2) +
  labs(title="Interpersonal trust",x="",colour="",y="") +
  scale_y_continuous(limits=c(mean(data2$trust, na.rm = T)-0.1,mean(data2$trust, na.rm = T)+0.1)) +
  theme_minimal(base_size = 9) + scale_colour_grey(start=0.2, end=0.5) + 
  theme(legend.position="none", axis.text.x = element_text(size=7), axis.title.y = element_text(size=6, vjust = 1), 
        axis.text.y = element_text(size=5.5), plot.title = element_text(size=8))

# Culture graph
pred.mf <- matrix(nrow = 2,ncol = 3)
pred.mf[1,1] <- culture.pred.treat$fit[[1]]
pred.mf[1,2] <- culture.pred.treat$fit[[1]] - culture.pred.treat$se.fit[[1]]*1.96
pred.mf[1,3] <- culture.pred.treat$fit[[1]] + culture.pred.treat$se.fit[[1]]*1.96
pred.mf[2,1] <- culture.pred.control$fit[[1]]
pred.mf[2,2] <- culture.pred.control$fit[[1]] - culture.pred.control$se.fit[[1]]*1.96
pred.mf[2,3] <- culture.pred.control$fit[[1]] + culture.pred.control$se.fit[[1]]*1.96
pred.mf <- data.frame(pred.mf)
pred.mf$est <- c("Treatment", "Control")
pred.mf$est <- factor(pred.mf$est)
pred.mf$est <-  factor(pred.mf$est,levels(pred.mf$est)[c(2,1)])

culture.gr <- ggplot(pred.mf,aes(x=est,y=X1,colour=est)) +
  geom_errorbar(aes(ymin=X2, ymax=X3), width=.1,position = position_dodge(width = 0.2), size = 0.4) +
  geom_point(position = position_dodge(width = 0.2),size=2) +
  labs(title="Corruption not part of culture",x="",colour="",y="") +
  scale_y_continuous(limits=c(mean(data2$not.corrculture, na.rm = T)-0.1,
                              mean(data2$not.corrculture, na.rm = T)+0.1)) +
  theme_minimal(base_size = 9) + scale_colour_grey(start=0.2, end=0.5) + 
  theme(legend.position="none", axis.text.x = element_text(size=7), axis.title.y = element_text(size=6, vjust = 1), 
        axis.text.y = element_text(size=5.5), plot.title = element_text(size=8))

# Willing graph
pred.mf <- matrix(nrow = 2,ncol = 3)
pred.mf[1,1] <- willing.pred.treat[[1]]
pred.mf[1,2] <- willing.pred.treat[[3]]
pred.mf[1,3] <- willing.pred.treat[[4]]
pred.mf[2,1] <- willing.pred.control[[1]]
pred.mf[2,2] <- willing.pred.control[[3]]
pred.mf[2,3] <- willing.pred.control[[4]]
pred.mf <- data.frame(pred.mf)
pred.mf$est <- c("Treatment", "Control")
pred.mf$est <- factor(pred.mf$est)
pred.mf$est <-  factor(pred.mf$est,levels(pred.mf$est)[c(2,1)])

willing.gr <- ggplot(pred.mf,aes(x=est,y=X1,colour=est)) +
  geom_errorbar(aes(ymin=X2, ymax=X3), width=.1,position = position_dodge(width = 0.2), size = 0.4) +
  geom_point(position = position_dodge(width = 0.2),size=2) +
  labs(title="Willingness to bribe",x="",colour="",y="") +
  scale_y_continuous(limits=c(0.395-0.1,0.395+0.1)) +
  theme_minimal(base_size = 9) + scale_colour_grey(start=0.2, end=0.5) + 
  theme(legend.position="none", axis.text.x = element_text(size=7), axis.title.y = element_text(size=6, vjust = 1), 
        axis.text.y = element_text(size=5.5), plot.title = element_text(size=8))

# Combine graphs
treat.combined <- ggarrange(trust.gr, culture.gr, willing.gr,
                          ncol = 3, nrow = 1)
treat.combined <- annotate_figure(treat.combined, left = text_grob("Predicted probability",size = 7.5,rot = 90))
ggsave("figure3.tiff", width=16,height=6, units = "cm", dpi = 800, device = "tiff")


# TABLE 1, main manuscript

# Treatment table
willing.reg2 <- glm(crosswise.bribe~treatment, data = data2, family = "binomial")
bribe.reg2 <- lm_robust(bribeothers.combined~treatment, data = data2)

willing.reg2$coefficients[[1]] <- as.numeric(willing.reg$coefficients[[1]])
willing.reg2$coefficients[[2]] <- as.numeric(willing.reg$coefficients[[2]]) 

se <- list(summary(bribe.reg2)$std.error,
              summary(trust.reg)$coefficients[,2],
               summary(culture.reg)$coefficients[,2],
               summary(willing.reg)$coefficients[,2])

# Tablenotes were added using "threeparttable" in LaTeX
stargazer(bribe.reg, trust.reg, culture.reg, willing.reg2, 
          title="Effect of information treatment on outcome variables",
          align=TRUE, dep.var.labels.include = TRUE, 
          dep.var.labels = c("Bribe perception\n(others)","Interpersonal\ntrust",
                             "Not corruption\nculture","Willingness to\npay bribe"),
          omit.stat=c("LL","ser","f","rsq","aic","adj.rsq"), no.space=TRUE,
          covariate.labels = c("Treatment", "Constant"),
          notes = "Standard errors are shown in parentheses. Unstandardized coefficients. 
          Model 1 was estimated using OLS with robust standard errors.
          Model 2-3 were estimated using logistic regression. Model 4 shows estimates for the CM, 
          regressing the outcome variable on the treatment indicator. The model was estimated using 
          Maximum likelihood with a logit link function, using the R package RRreg (Heck and Moshagen 2018).
          $^{*}$p$<$0.05; $^{**}$p$<$0.01; $^{***}$p$<$0.001.", 
          notes.append = FALSE, notes.align = "l", column.sep.width = "-2pt", digits = 2,
          star.cutoffs = c(0.05,0.01,0.001), font.size = "footnotesize", model.names = F, se = se)

#####################################################################################################################
# Heterogeneous treatment
#####################################################################################################################
# FIGURE 4, main manuscript

# Function: interaction data
flexint.2 <- function(data, X, treat) {
  df.int <- data
  df.int$X <- X
  df.int$D <- treat
  quant <- quantile(df.int$X, 1/2, na.rm = T)
  
  m1 <- median(df.int$X[df.int$X<quant[[1]]], na.rm = T)
  m2 <- median(df.int$X[df.int$X>=quant[[1]]], na.rm = T)
  
  df.int$G1 <- 0
  df.int$G1[df.int$X<quant[[1]]] <- 1
  df.int$G2 <- 0
  df.int$G2[df.int$X>=quant[[1]]] <- 1
  
  df.int$X1 <- (df.int$X-m1)*df.int$G1
  df.int$X2 <- (df.int$X-m2)*df.int$G2
  
  df.int$D.G1 <- df.int$D*df.int$G1
  df.int$D.G2 <- df.int$D*df.int$G2
  
  df.int$X1.G1.D <- df.int$X1*df.int$G1*df.int$D
  df.int$X2.G2.D <- df.int$X2*df.int$G2*df.int$D
  
  return(df.int)
}

# Function: interaction log
flexintreg.2 <- function(df, Y){
  reg <- glm(Y~G1+G2+
               X1+X2+
               D.G1+D.G2+
               X1.G1.D+X2.G2.D-1, 
             data = df, family = "binomial")
  return(reg)
}

# Function: interaction prepost diff
flexintreg.3 <- function(df, Y){
  reg <- lm(Y~G1+G2+
              X1+X2+-1, 
            data = df)
  return(reg)
}

# Function: interaction RRlog
flexintreg.4 <- function(df, Y){
  reg <- RRlog(Y~G1+G2+
                 X1+X2+
                 D.G1+D.G2+
                 X1.G1.D+X2.G2.D-1, 
               data = df, model = "Crosswise", p=0.25)
  return(reg)
}

# Function: interaction prepost diff - robust se
flexintreg.5 <- function(df, Y){
  reg <- lm_robust(Y~G1+G2+
              X1+X2+-1, 
            data = df)
  return(reg)
}

# Intdata
intdata <- flexint.2(data2, data2$bribeothers, data2$treatment)

# Intreg: trust
quantile(data2$bribeothers, c(.25, .75), na.rm = T)

intreg.hist.gr  <- ggplot(data2, aes(x=bribeothers)) +
  geom_histogram(aes(y = (..count..)/sum(..count..)),
                 colour="black", fill="grey20", alpha = 0.75, 
                 breaks = c(0,10,20,30,40,50,60,70,80,90,100)) +
  scale_x_continuous(breaks=c(0, 46.75, 50, 83, 100), 
                     labels = c("0", "25th\npercentile", "", "75th\npercentile", "100"))+ 
  labs(x="", y="Density", title = "Prior distribution") +
  theme_minimal(base_size = 11) +
  theme(legend.position="none", axis.text.x = element_text(size=9), axis.title.y = element_text(size=8, vjust = 1), 
        axis.text.y = element_text(size=10), plot.title = element_text(size=11)) +
  geom_vline(xintercept = 46.75, linetype = "longdash", size = .6) +
  geom_vline(xintercept = 83, linetype = "longdash", size = .6)

# Int graph, trust
intreg <- flexintreg.2(intdata, intdata$trust)

intreg.mf <- matrix(nrow = 2,ncol = 3)
intreg.mf[1,1] <- summary(intreg)$coefficients[5,1]
intreg.mf[1,2] <- summary(intreg)$coefficients[5,1]+1.96*summary(intreg)$coefficients[5,2]
intreg.mf[1,3] <- summary(intreg)$coefficients[5,1]-1.96*summary(intreg)$coefficients[5,2]
intreg.mf[2,1] <- summary(intreg)$coefficients[6,1]
intreg.mf[2,2] <- summary(intreg)$coefficients[6,1]+1.96*summary(intreg)$coefficients[6,2]
intreg.mf[2,3] <- summary(intreg)$coefficients[6,1]-1.96*summary(intreg)$coefficients[6,2]
intreg.mf <- data.frame(intreg.mf)
intreg.mf$prior <- c("Low prior\n25th pctl", "High prior\n75th pctl")
intreg.mf$prior <- factor(intreg.mf$prior)
intreg.mf$prior <-  factor(intreg.mf$prior,levels(intreg.mf$prior)[c(2,1)])

intreg.trust.gr <- ggplot(intreg.mf,aes(x=prior,y=X1,colour=prior)) +
  geom_errorbar(aes(ymin=X2, ymax=X3), width=.1,position = position_dodge(width = 0.2), size = 0.4) +
  geom_point(position = position_dodge(width = 0.2),size=2) +
  labs(title="Interpersonal trust",x="",colour="",y="Estimate") +
  scale_y_continuous(limits=c(-1.1,0.9)) +
  theme_minimal(base_size = 11) + scale_colour_grey(start=0.2, end=0.5) + 
  theme(legend.position="none", axis.text.x = element_text(size=9), axis.title.y = element_text(size=8, vjust = 1), 
        axis.text.y = element_text(size=10), plot.title = element_text(size=11)) +
  geom_hline(aes(yintercept = 0), colour = "black", linetype ="longdash", size = .4, alpha = .3) 

# Int graph, culture
intreg <- flexintreg.2(intdata, intdata$not.corrculture)

intreg.mf <- matrix(nrow = 2,ncol = 3)
intreg.mf[1,1] <- summary(intreg)$coefficients[5,1]
intreg.mf[1,2] <- summary(intreg)$coefficients[5,1]+1.96*summary(intreg)$coefficients[5,2]
intreg.mf[1,3] <- summary(intreg)$coefficients[5,1]-1.96*summary(intreg)$coefficients[5,2]
intreg.mf[2,1] <- summary(intreg)$coefficients[6,1]
intreg.mf[2,2] <- summary(intreg)$coefficients[6,1]+1.96*summary(intreg)$coefficients[6,2]
intreg.mf[2,3] <- summary(intreg)$coefficients[6,1]-1.96*summary(intreg)$coefficients[6,2]
intreg.mf <- data.frame(intreg.mf)
intreg.mf$prior <- c("Low prior\n25th pctl", "High prior\n75th pctl")
intreg.mf$prior <- factor(intreg.mf$prior)
intreg.mf$prior <-  factor(intreg.mf$prior,levels(intreg.mf$prior)[c(2,1)])

intreg.culture.gr <- ggplot(intreg.mf,aes(x=prior,y=X1,colour=prior)) +
  geom_errorbar(aes(ymin=X2, ymax=X3), width=.1,position = position_dodge(width = 0.2), size = 0.4) +
  geom_point(position = position_dodge(width = 0.2),size=2) +
  labs(title="Corruption not part of culture",x="",colour="",y="Estimate") +
  scale_y_continuous(limits=c(-1.1,0.9)) +
  theme_minimal(base_size = 11) + scale_colour_grey(start=0.2, end=0.5) + 
  theme(legend.position="none", axis.text.x = element_text(size=9), axis.title.y = element_text(size=8, vjust = 1), 
        axis.text.y = element_text(size=10), plot.title = element_text(size=11)) +
  geom_hline(aes(yintercept = 0), colour = "black", linetype ="longdash", size = .4, alpha = .3) 

# Int graph, willingness
intreg <- flexintreg.4(intdata, intdata$crosswise.bribe)

intreg.mf <- matrix(nrow = 2,ncol = 3)
intreg.mf[1,1] <- summary(intreg)$coefficients[5,1]
intreg.mf[1,2] <- summary(intreg)$coefficients[5,1]+1.96*summary(intreg)$coefficients[5,2]
intreg.mf[1,3] <- summary(intreg)$coefficients[5,1]-1.96*summary(intreg)$coefficients[5,2]
intreg.mf[2,1] <- summary(intreg)$coefficients[6,1]
intreg.mf[2,2] <- summary(intreg)$coefficients[6,1]+1.96*summary(intreg)$coefficients[6,2]
intreg.mf[2,3] <- summary(intreg)$coefficients[6,1]-1.96*summary(intreg)$coefficients[6,2]
intreg.mf <- data.frame(intreg.mf)
intreg.mf$prior <- c("Low prior\n25th pctl", "High prior\n75th pctl")
intreg.mf$prior <- factor(intreg.mf$prior)
intreg.mf$prior <-  factor(intreg.mf$prior,levels(intreg.mf$prior)[c(2,1)])

intreg.willing.gr <- ggplot(intreg.mf,aes(x=prior,y=X1,colour=prior)) +
  geom_errorbar(aes(ymin=X2, ymax=X3), width=.1,position = position_dodge(width = 0.2), size = 0.4) +
  geom_point(position = position_dodge(width = 0.2),size=2) +
  labs(title="Willingness to bribe",x="",colour="",y="Estimate") +
  scale_y_continuous(limits=c(-1.1,0.9)) +
  theme_minimal(base_size = 11) + scale_colour_grey(start=0.2, end=0.5) + 
  theme(legend.position="none", axis.text.x = element_text(size=9), axis.title.y = element_text(size=8, vjust = 1), 
        axis.text.y = element_text(size=10), plot.title = element_text(size=11)) +
  geom_hline(aes(yintercept = 0), colour = "black", linetype ="longdash", size = .4, alpha = .3) 

# Combine interaction graphs
int.combined <- ggarrange(intreg.trust.gr, intreg.culture.gr, intreg.willing.gr, intreg.hist.gr,
                          ncol = 2, nrow = 2)
ggsave("figure4.tiff", dpi = 800, device = "tiff")


# TABLE 1, Appendix D: compare models, estimates + table
bribe.comp <- lm(prepost.diff~treatment,
                  data = data2[!is.na(data2$prepost.diff),])
bribe.int <- flexintreg.3(intdata, intdata$prepost.diff)
bribe.aic.diff <- AIC(bribe.int)-AIC(bribe.comp)

bribe.comp.robust <- lm_robust(prepost.diff~treatment,
                 data = data2[!is.na(data2$prepost.diff),])
bribe.int.robust <- flexintreg.5(intdata, intdata$prepost.diff)

trust.comp <- glm(trust~treatment,
            data = data2[!is.na(data2$bribeothers),], family = "binomial")
trust.int <- flexintreg.2(intdata, intdata$trust)
trust.aic.diff <- AIC(trust.int)-AIC(trust.comp)

culture.comp <- glm(not.corrculture~treatment,
            data = data2[!is.na(data2$bribeothers),], family = "binomial")
culture.int <- flexintreg.2(intdata, intdata$not.corrculture)
culture.aic.diff <- AIC(culture.int)-AIC(culture.comp)

willing.comp <- RRlog(crosswise.bribe~treatment, 
              data = data2[!is.na(data2$bribeothers),], model = "Crosswise", p=0.25)
willing.int <- flexintreg.4(intdata, intdata$crosswise.bribe)
willing.int2 <- flexintreg.2(intdata, intdata$crosswise.bribe)
willing.aic.diff <- (-2*willing.int$logLik+2*8)-(-2*willing.comp$logLik+2*2)

willing.int2$coefficients <- willing.int$coefficients

se <- list(bribe.int.robust$std.error,
           summary(trust.int)$coefficients[,2],
           summary(culture.int)$coefficients[,2],
           summary(willing.int)$coefficients[,2])

# Tablenotes were added using "threeparttable" in LaTeX
stargazer(bribe.int, trust.int, culture.int, willing.int2, 
          title="Heterogeneous treatment effects, interaction models",
          align=TRUE, dep.var.labels.include = TRUE, 
          dep.var.labels= c("Updating (post - pre)", "Interpersonal trust", 
                             "Not corruption culture","Willingness to pay bribe"),
          omit.stat=c("LL","ser","f","rsq","aic","adj.rsq"), no.space=TRUE,
          covariate.labels = c("Low group", "High group", "Prior x Low", "Prior x High",
                               "Treat x Low", "Treat x High", "Prior x Treat x Low", "Prior x Treat x High"),
          add.lines=list(c("$Delta AIC$ compared to base model", round(bribe.aic.diff,2),
                           round(trust.aic.diff,2), round(culture.aic.diff,2), round(willing.aic.diff,2))),
          notes = "Standard errors are shown in parentheses. Unstandardized coefficients. 
          The interaction model is based on the binning estimator
          in Hainmueller et al. (2019). `Low group' and `High group' denotes respondents with priors below and above 
          the median prior, respectively. `Prior' is the bribe beliefs variable centered on the median within each bin.
          `Treat' is the information treatment. Model 1 was estimated with OLS and robust 
          standard errors with respondents' pre-post change in bribery beliefs as the dependent variable. Models 2-3
          were estimated using logistic regression. Model 4 was estimated using Maximum likelihood estimation 
          with a logit link function, using the R package RRreg (Heck and Moshagen 2018). `Base model' is the
          default model regressing a specific outcome variable on a treatment indicator. The difference in AIC 
          refers to the AIC for the interaction model minus the AIC for the base model. 
          $^{*}$p$<$0.05; $^{**}$p$<$0.01; $^{***}$p$<$0.001.", 
          notes.append = FALSE, notes.align = "l", column.sep.width = "-2pt", digits = 2, model.names = F, se = se,
          star.cutoffs = c(0.05,0.01,0.001), font.size = "footnotesize")

#####################################################################################################################
# Predict prior and updating
#####################################################################################################################
# TABLE 2, Appendix D

# Estimates
prior <- lm(bribeothers~female+university.ed+large.city+log.income+age+I(age^2), data = intdata)
prior.group <- lm(G2~female+university.ed+large.city+log.income+age+I(age^2), 
                  data = intdata[!is.na(intdata$bribeothers),])
update <- lm(prepost.diff~female+university.ed+large.city+log.income+age+I(age^2), data = intdata)

# Tablenotes were added using "threeparttable" in LaTeX
stargazer(prior, prior.group, update, title="Predicting prior and degree of updating",
          align=TRUE, dep.var.labels.include = TRUE, 
          dep.var.labels = c("Bribe perception (others)","P(High prior)",
                             "Updating (post - pre)"),
          omit.stat=c("LL","ser","f","rsq","aic"), no.space=TRUE,
          covariate.labels = c("Female", "University ed.", "Large city", "Log income", "Age", "Age$^2$"),
          notes = "All models were estimated with OLS and robust standard errors (shown in parentheses).
          The dependent variable in model 1 is respondents' pretreatment perception of others' bribery beliefs, 
          while the same variable in model 2 is whether a respondent's prior is above the sample median (1)
          or not (0). The dependent variable in model 3 indicates the amount of updating for respondents in the 
          treatment group by taking posttreatment bribery beliefs minus pretreatment bribery beliefs. The covariate 
          `Large city' indicates whether respondents reported that they live in a city with more than 1,000 000 
          inhabitants and the covariate `Log income' indicates the natural logarithm of a respondent's reported
          household income. $^{*}$p$<$0.05; $^{**}$p$<$0.01; $^{***}$p$<$0.001.", 
          notes.append = FALSE, notes.align = "l", column.sep.width = "10", digits = 2,
          star.cutoffs = c(0.05,0.01,0.001), font.size = "footnotesize", se = starprep(prior, prior.group, update))

#####################################################################################################################
# Other
#####################################################################################################################
# TABLE 4, APpendix D: descriptives (sample)
descriptives <- data2[,c("female","university.ed","large.city","age","house.income")]
stargazer(descriptives, summary.stat = c("mean","sd","min","max","n"), 
          covariate.labels = c("Female respondent","University degree",
                               "Large city (over 1,000 000 inhabitants)",
                               "Age","Household income (pesos per month, pre-tax)"),
          digits = 2,
          title = "Sample characteristics", 
          notes = "Note: Some extreme (probably miscoded) outliers were excluded from the `Household income' variable.", 
          notes.append = TRUE)

median(descriptives$house.income, na.rm = TRUE)
median(descriptives$age, na.rm = TRUE)
IQR(descriptives$house.income, na.rm = TRUE)

# TABLE 6: Covariate balance
descriptives <- subset(data2, treatment == 1, select = 
                         c("female","university.ed","large.city","age","house.income","treatment"))

stargazer(descriptives, summary.stat = c("mean","sd","n"), 
          covariate.labels = c("Female respondent","University degree",
                               "Large city (over 1,000 000 inhabitants)",
                               "Age","Household income (pesos per month, pre-tax)"),
          digits = 2,
          title = "Sample characteristics", 
          notes = "Note: Some extreme (probably miscoded) outliers were excluded from the `Household income' variable.", 
          notes.append = TRUE)

descriptives <- subset(data2, treatment == 0, select = 
                         c("female","university.ed","large.city","age","house.income","treatment"))

stargazer(descriptives, summary.stat = c("mean","sd","n"), 
          covariate.labels = c("Female respondent","University degree",
                               "Large city (over 1,000 000 inhabitants)",
                               "Age","Household income (pesos per month, pre-tax)"),
          digits = 2,
          title = "Sample characteristics", 
          notes = "Note: Some extreme (probably miscoded) outliers were excluded from the `Household income' variable.", 
          notes.append = TRUE)

# Predicting assignment
descriptives <- data2[,c("female","university.ed","large.city","age","house.income","treatment")]
descriptives <- descriptives[complete.cases(descriptives),]
mod1 <- lm(treatment~1, data = descriptives)
mod2 <- lm(treatment~female+university.ed+large.city+log(house.income+1)+age, data = descriptives)

anova(mod1, mod2)

# Section D.4: Descriptive statistics and generalizability, treatment heterogeneity
summary(glm(trust~treatment+university.ed+treatment:university.ed, data = data2, family = "binomial"))
summary(glm(not.corrculture~treatment+university.ed+treatment:university.ed, data = data2, family = "binomial"))
summary(RRlog(crosswise.bribe~treatment+university.ed+treatment:university.ed, data = data2,
              model = "Crosswise", p=0.25))

summary(glm(trust~treatment+large.city+treatment:large.city, data = data2, family = "binomial"))
summary(glm(not.corrculture~treatment+large.city+treatment:large.city, data = data2, family = "binomial"))
summary(RRlog(crosswise.bribe~treatment+large.city+treatment:large.city, data = data2,
              model = "Crosswise", p=0.25))

summary(glm(trust~treatment+log(house.income+1)+treatment:log(house.income+1), data = data2, family = "binomial"))
summary(glm(not.corrculture~treatment+log(house.income+1)+treatment:log(house.income+1), data = data2, family = "binomial"))
summary(RRlog(crosswise.bribe~treatment+log(house.income+1)+treatment:log(house.income+1), data = data2,
              model = "Crosswise", p=0.25))

summary(glm(trust~treatment+age+treatment:age, data = data2, family = "binomial"))
summary(glm(not.corrculture~treatment+age+treatment:age, data = data2, family = "binomial"))
summary(RRlog(crosswise.bribe~treatment+age+treatment:age, data = data2,
              model = "Crosswise", p=0.25))

